clear all
*global path = "C:/Users/KShih/Dropbox/Work/Research Projects/Active/chinese_students_2017/"
global path = "C:\Users\creto\Desktop\chinese_students_2017"

/*
TO DO:
-pretrend trade graph
-postperiod trade graph (first stage)
-trend of chinese students in US overall in pre period
-table form

*/

//OPEN LOG FILE
cap log close
*log using "$path/Logfiles/analysis_expweighted_08feb2021.log", replace


clear
set more off
set matsize 10000



/****************************************************************************/

****************       STEP 1: LOAD DATA     **************

/****************************************************************************/

	
	//LOAD TRADE DATA BY CITY - NOTE NEW DATA WITH CORRECTED PREFECTURE AGG FOR TRADE, STUD, AND NTR IV.
	use "$path/Data/6.AnalysisData/data_merge08feb2021.dta", clear
		tab year balanced, m //431 cities in trade data
		drop if year==.
	
		//merge in exports by destination
		merge 1:1 chinacity year using "$path/Data/1.ChinaData/Trade/CHNExport_bydest"
		drop if _merge==2
		drop _merge
		
		*population is in units of 10,000 persons, conver to 1,000 persons
		sum pop
		replace pop = pop*10
		label var pop "in 1,000s of persons"
		sum pop
		
		//Merge in the number of NEW establishments according to registry data (August 2021)
		/*	merge 1:1 chinacity year using "$path/Data/1.ChinaData/NewEstablishment/NewFirms8015_cities"
		drop if _merge==2
		drop _merge
		tsset chinacity year
		replace fnumber = fnumber/pop
		replace fnumber_manuf = fnumber_manuf/pop */
		merge 1:1 chinacity year using "$path/Data/1.ChinaData/NewEstablishment/nfirms_census"
		drop if _merge==2
		drop _merge
		tsset chinacity year
		merge 1:1 chinacity year using "$path/Data/1.ChinaData/NewEstablishment/nfirms_asip"
		drop if _merge==2
		drop _merge
		tsset chinacity year
		merge 1:1 chinacity year using "$path/Data/1.ChinaData/NewEstablishment/invst"
		drop if _merge==2
		drop _merge
		tsset chinacity year
		merge 1:1 chinacity year using "$path/Data/1.ChinaData//StudyAbroadAgency/Data_StudyAbroadAgency8015"
		rename fnumber newagencies
		bys chinacity: egen totagenc_t = total(newagencies) if year < 2002 //some will have 0
		bys chinacity: egen totagenc = max(totagenc_t)
		gen hadagenc = (totagenc > 0 )
		drop if _merge==2
		drop _merge
		tsset chinacity year
		bys chinacity: gen shc_agenc_0413 = ((newagencies - L9.newagencies) - (L13.newagencies - L16.newagencies)) / (L9.pop) if year==2013
		bys chinacity: gen shc_agenc_0113 = ((newagencies - L12.newagencies) - (L13.newagencies - L16.newagencies)) / (L9.pop) if year==2013
		gen ivagenc = ntrgap_exw97*totagenc
		gen ivhadagenc = ntrgap_exw97*hadagenc
*----------------------------------------------------------------------------*			
/****************************************************************************/

**************       STEP 2: VARIABLE CONSTRUCTION       *********************

******************************************************************************
*----------------------------------------------------------------------------*			
	
	**NEW POP
	preserve
	use "$path/Data/1.ChinaData/Population/CityPop2005", clear
	drop city year
	destring citycode, replace
	rename pop pop05
	rename pop_urb pop_urb05
		replace pop05 = pop05*10
		replace pop_urb05 = pop_urb05*10
	tempfile pop05
	save "`pop05'"
	restore
	
	merge m:1 citycode using "`pop05'"
	drop _merge
/*	
	//old pop
	rename pop oldpop
	rename pop05 pop
	
**CAN DELETE ALL THE ABOVE IFPOP DOESNT WORK
	*/
	gen oldpop=pop
	
	** Time varying tariffs
	
	**CALCULATE LOG OF VARIABLES
	gen gdppc = gdp/(oldpop/10)
		foreach x in emp oldpop dvalue dvalue_processing gdp gdppc wid inv_realest inv_fdi num_college num_midschool libbook rideship numstd_college numstd_midschool ///
					numfcty_college numfcty_midschool num1body_c num1body_cp num1title_c num1title_cp num2title_c num2title_cp num2body_c num2body_cp ///
					tot_amtfunds_personal tot_amtfunds_other tot_amtfunds_total tot_amtcost_tuition tot_amtcost_living tot_amtcost_other tot_amtcost_total ///
					dvalue_usa dvalue_can dvalue_usa_proc dvalue_usa_all dvalue_nonusa dvalue_noeurope dvalue_nafta dvalue_northam dvalue_mex dvalue_hk ///
					dvalue_viet dvalue_japan dvalue_sing dvalue_uae dvalue_aus dvalue_uk dvalue_france dvalue_europe dvalue_asia dvalue_all /// 
					 {
					

			gen ln_`x' = ln(`x')
		
		}
	
	**CALCULATE DEPENDENT VARIABLE FOR FIRST STAGE - CHANGE IN ACTUAL EXPORTS
	
		sort citycode year
		foreach x in dvalue dvalue_processing gdp oldpop gdppc wid inv_realest inv_fdi emp /// 
					 dvalue_usa dvalue_can dvalue_usa_proc dvalue_usa_all dvalue_nonusa dvalue_noeurope dvalue_nafta dvalue_northam dvalue_mex dvalue_hk ///
					dvalue_viet dvalue_japan dvalue_sing dvalue_uae dvalue_aus dvalue_uk dvalue_france dvalue_europe dvalue_asia dvalue_all {  // Any other variables??
				
			bys citycode: gen lnc_`x'_0413 = (ln_`x' - ln_`x'[_n-9]) if year==2013
			bys citycode: gen lnc_`x'_0113 = (ln_`x' - ln_`x'[_n-12]) if year==2013
			bys citycode: gen lnc_`x'_0013 = (ln_`x' - ln_`x'[_n-13]) if year==2013
			bys citycode: gen lnc_`x' = (ln_`x' - ln_`x'[_n-1]) if year>2000
			
			if "`x'" == "gdp"| "`x'" == "oldpop"| "`x'" == "gdppc" {
			bys citycode: gen lnc_`x'_0213 = (ln_`x' - ln_`x'[_n-11]) if year==2013
			}
		}
	
	
 
		
	**GENERATE PRETREND OUTCOME VARIABLES
		sort citycode year
		
		bys citycode: gen pch_numstd_college_9700 = (numstd_college[_n-13] - numstd_college[_n-16])/(pop[_n-16]) if year==2013
		bys citycode: gen pch_num_college_9700 = (num_college[_n-13] - num_college[_n-16])/(pop[_n-16]) if year==2013
		
		foreach var of varlist dvalue emp gdp inv_realest inv_fdi num_college num_midschool libbook rideship numstd_college numstd_midschool numfcty_college /// 
							   numfcty_midschool num1body_c num1body_cp num1title_c num1title_cp num2title_c num2title_cp num2body_c num2body_cp {
							   
			bys citycode: gen lnc_`var'_9700 = (ln_`var'[_n-13] - ln_`var'[_n-16]) if year==2013
			bys citycode: gen pct_`var'_9700 = (`var'[_n-13] - `var'[_n-16])/`var'[_n-16] if year==2013
			bys citycode: gen lnc_`var'_9496 = (ln_`var'[_n-17] - ln_`var'[_n-19]) if year==2013
			bys citycode: gen pct_`var'_9496 = (`var'[_n-17] - `var'[_n-19])/`var'[_n-19] if year==2013
			label var lnc_`var'_9700  "$\Delta\ln(`var'^{97-00})$"
			label var lnc_`var'_9496  "$\Delta\ln(`var'^{94-96})$"
			
		}

	
	**CONSTRUCT OUTCOME STUDENT VARIABLES: NOTE STUDENTS CAN ONLY BE MEASURED 04-13

		rename stud_3 stud_as
		rename stud_4 stud_ba
		rename stud_5 stud_ma
		rename stud_6 stud_dr
	
		*how many cities with population in 2004? Answer is 280
		*codebook citycode if year==2004 & pop!=.
		
		sort citycode year
		foreach lvl in as ba ma dr tot {
		
			**log change. Need to add 1
			bys citycode: gen lnc_stud_`lvl'_0413 = ln(stud_`lvl') - ln(stud_`lvl'[_n-9]) if year==2013
			
			
			**pct change as share of pop
			bys citycode: gen shc_stud_`lvl'_0413 = (stud_`lvl' - stud_`lvl'[_n-9]) / (pop[_n-9]) if year==2013
			bys citycode: gen shc_stud_`lvl'_0013 = (stud_`lvl' - stud_`lvl'[_n-13]) / (pop[_n-13]) if year==2013
			bys citycode: gen shc_stud_`lvl'_0113 = (stud_`lvl' - stud_`lvl'[_n-12]) / (pop[_n-12]) if year==2013
			bys citycode: gen shc_stud_`lvl'_0213 = (stud_`lvl' - stud_`lvl'[_n-11]) / (pop[_n-11]) if year==2013
			bys citycode: gen shc_stud_`lvl'_0208 = (stud_`lvl'[_n-5] - stud_`lvl'[_n-11]) / (pop[_n-11]) if year==2013
			
			bys citycode: gen shc_stud_`lvl'_pop05_0413 = (stud_`lvl' - stud_`lvl'[_n-9]) / (pop05[_n-9]) if year==2013
			
			
		
		}


	**CONSTRUCT OUTCOME STUDENT EXPENDITURES AND FUNDS: NOTE STUDENTS CAN ONLY BE MEASURED 04-13

		*how many cities with population in 2004? Answer is 280
		*codebook citycode if year==2004 & pop!=.
		rename tot_amtfunds_personal funds_personal 
		rename tot_amtfunds_other funds_other  
		rename tot_amtfunds_total funds_total 
		rename tot_amtcost_tuition cost_tuition 
		rename tot_amtcost_living cost_living 
		rename tot_amtcost_other cost_other 
		rename tot_amtcost_total cost_total

		sort citycode year
		
		
		foreach x in funds_personal funds_other funds_total cost_tuition cost_living cost_other cost_total amtfunds_personal amtfunds_other amtfunds_total amtcost_tuition amtcost_living amtcost_other amtcost_total {
		replace `x' = 0 if `x'==. & year>=2004
		bys citycode: gen lnc_`x'_0413 = ln(`x') - ln(`x'[_n-9]) if year==2013
		bys citycode: gen shc_`x'_0413 = (`x' - `x'[_n-9]) / pop[_n-9] if year==2013
		
		}
		
		

	**EMPLOYMENT WEIGHTS, USE 2003 POP OR EMP
		sort citycode year
		bys citycode: gen empwt03 = emp[_n-10] if year==2013 //2013 is the only year used for regressions, need employment in those dates (use 2003)
		bys citycode: gen popwt03 = pop[_n-10] if year==2013 //2013 is the only year used for regressions, need employment in those dates (use 2003)
		bys citycode: gen popwt97 = pop[_n-16] if year==2013 //2013 is the only year used for regressions, need employment in those dates (use 2003)
		bys citycode: gen popwt = pop[_n-1] if year>2004 //2013 is the only year used for regressions, need employment in those dates (use 2003)

	**CITY LEVEL CONTROL FOR POLICIES
		gen policies_cp = num2title_cp + num2body_cp + num1title_cp + num1body_cp
		bys citycode: egen policies_cp_0013_tot = total(policies_cp) if year>=2000 & year<=2013
		bys citycode: egen policies_cp_0013 = min(policies_cp_0013_tot)
		drop policies_cp_0013_tot
		
		bys citycode: egen policies_cp_0413_tot = total(policies_cp) if year>=2004 & year<=2013
		bys citycode: egen policies_cp_0413 = min(policies_cp_0413_tot)
		drop policies_cp_0413_tot
		
		bys citycode: egen policies_cp_9700_tot = total(policies_cp) if year>=1997 & year<=2000
		bys citycode: egen policies_cp_9700 = min(policies_cp_9700_tot)
		drop policies_cp_9700_tot


		
		gen largeprefecture= (citycode==1100|citycode==1200|citycode==3100|citycode==5000) //beijing, tianjin, shanghai, chongqing
		tab city if largeprefecture==1

		gen stringcity = string(citycode)
		gen capital = (substr(stringcity, 3, 2)=="01")
		drop stringcity
		
			
		gen coastal = 0
		replace coastal = 1 if citycode==2102 //Dalian
		replace coastal = 1 if citycode==2106 //Dandong
		replace coastal = 1 if citycode==2107 //Jinzhou
		replace coastal = 1 if citycode==1303 //Qinghuangdao
		replace coastal = 1 if citycode==3706 //Yantai
		replace coastal = 1 if citycode==3702 //Qingdao
		replace coastal = 1 if citycode==3301 //Hangzhou
		replace coastal = 1 if citycode==3302 //Ningbo
		replace coastal = 1 if citycode==3303 //Wenzhou
		replace coastal = 1 if citycode==3501 //Fuzhou
		replace coastal = 1 if citycode==3502 //Xiamen	
		replace coastal = 1 if citycode==3503 //Putian
		replace coastal = 1 if citycode==4505 //Beihi
		replace coastal = 1 if citycode==4509 //Qinzhou
		replace coastal = 1 if citycode==4601 //Haikou


		
		*WID of nonUSA and of non China as an exporter

		merge 1:1 chinacity year using "$path/Data/6.AnalysisData//WID_expweighted97_pref_rev_USimp_Combined" 
		drop if _merge==2
		drop _merge
		
		sort citycode year
*		bys citycode: gen ch_widnousa_0013 = (XD_nonus - XD_nonus[_n-13]) if year==2013	
*		bys citycode: gen ch_widnochina_0013 = (XD_nonus - XD_nonus[_n-13]) if year==2013	
*		bys citycode: gen iv_wid_rev = (ln(XD1) - ln(XD1[_n-9])) if year==2013	
*		bys citycode: gen iv_wid_rev2 = (ln(XD2) - ln(XD2[_n-9])) if year==2013	
		bys citycode: gen iv_wid_rev = (ln(XD_row) - ln(XD_row[_n-9])) if year==2013	
		bys citycode: gen iv_wid_rev_eastasia = (ln(XD_eastasia) - ln(XD_eastasia[_n-9])) if year==2013	
		
		*NTR IV using emp weight in 1990
		merge m:1 citycode using "$path/Data/6.AnalysisData/IV1/gapsweighted90", keepusing(ntrgap_empweighted90)
		drop if _merge==2
		drop _merge
		label var ntrgap_empweighted90 "\$PNTR^{1990}_{c}\$"
		
		*NTR IV using HS6 level trade data
		merge m:1 citycode using "$path/Data/6.AnalysisData/IV1_July2019/gapsexpweighted97_hscodes"
		drop if _merge==2
		drop _merge
		label var ntrgap_expweighted97_hs6 "\$PNTR^{HS}_{c}\$"
		
		*NTR IV using ONLY USA TRADE DATA
		merge m:1 citycode using "$path/Data/6.AnalysisData/IV1_July2019/gapsexpw97USA"
		drop if _merge==2
		drop _merge
		label var ntrgap_expwUSA "\$PNTR^{USA}_{c}\$"

		*NTR IV using ONLY USA TRADE DATA AND EMPLOYMENT DENOMINATORS
		merge m:1 citycode using "$path/Data/6.AnalysisData/IV1_July2019/gapsgdpwUSA"
		drop if _merge==2
		drop _merge
		label var ntrgap_gdpwUSA "\$PNTR^{USAX/GDP}_{c}\$"
		label var ntrgap_saleswUSA "\$PNTR^{USAX/SALES}_{c}\$"

		
		*NTR IV dropping big rotemberg industries 
		gen citycode_custom = string(citycode)
		merge m:1 citycode_custom using "$path/Data/6.AnalysisData/Robust-drop_big_ind/gaps_expweighted97_ExBigInd", keepusing(ntrgap_expweighted97)
		drop if _merge==2
		drop _merge citycode_custom
		rename ntrgap_expweighted97 ntrgap_rotem
		label var ntrgap_rotem "\$PNTR^{IND}_{c}\$"
		
		*SKILLED AND UNSKILLED NTR IV SHOCKS
		merge m:1 chinacity using "$path/Data/6.AnalysisData/Shocks_by_Skill/tradeshock_ntrgap&mfa_byskill_rev"
		drop if _merge==2
		drop _merge
		
		

		//NEW CONTROLS FROM ARIEL
		
		*exports license requirements interacting with change from 00-07
		merge m:1 chinacity using "$path/Data/6.AnalysisData/IV1_July2019/expreq0007_expweighted97"
		drop if _merge==2
		drop _merge
		
		*import tariffs & input tariffs
		merge m:1 chinacity using "$path/Data/6.AnalysisData/IV1_July2019/tariffsexpweighted97"
		drop if _merge==2
		drop _merge
		merge 1:1 chinacity year using "$path/Data/6.AnalysisData/tariffsexpweighted97_annual"
		drop if _merge==2
		drop _merge		
		gen ln_chinatariff_expw97 = ln(chinatariff_expw97)
		gen ln_inputtariff_expw97 = ln(inputtariff_expw97)
		
			
		**CLEAN UP, RENAME, LABEL
		rename ntrgap_exw97 iv_ntr
		rename ntrgap_expweighted97_hs6 iv_hs6_ntr
		rename ntrgap_expwUSA iv_ntr_USA
		rename ntrgap_gdpwUSA iv_ntr_gdpUSA
		rename ntrgap_saleswUSA iv_ntr_salesUSA
		rename mfa2001_exw97 iv_mfa
		rename lnc_wid_0413 iv_wid_0413
		rename lnc_wid_0113 iv_wid_0113
		rename lnc_wid_0013 iv_wid_0013
		
		label var iv_mfa "MFA Instrument"
		label var iv_wid_0413 "WID Instrument, log diff 04-13"
		label var iv_wid_0113 "WID Instrument, log diff 01-13"
		label var iv_wid_0013 "WID Instrument, log diff 00-13"
		*label var ntr_rate_exw97 "Initial NTR Rate"
		label var contract_cons_exw97 "Contract Enforcement"
		label var fraction_nfirm_exw97 "License Requirements in 2000"
		label var share_revenue_exw97 "License Requirements"
		label var mfadiff_exw97 "MFA Exposure Change"
		label var lnc_dvalue_0413 "$\Delta\ln(X^{04-13})$"
		label var lnc_dvalue_0113 "$\Delta\ln(X^{01-13})$"
		label var lnc_dvalue_0013 "$\Delta\ln(X^{00-13})$"
		label var lnc_dvalue_nonusa_0013 "$\Delta\ln(X^{00-13}_{nonUSA})$"
		label var lnc_gdp_0413 "$\Delta\ln(GDP^{04-13})$"
		label var lnc_gdp_0113 "$\Delta\ln(GDP^{01-13})$"
		label var lnc_gdp_0013 "$\Delta\ln(GDP^{00-13})$"
		label var policies_cp_0013 "Cum. Total policies (city + province) 2000-2013"
		label var policies_cp_0413 "Cum. Total policies (city + province) 2004-2013"
		label var policies_cp_9700 "Cum. Total policies (city + province) 1997-2000"

		label var iv_ntr "\$PNTR_{c}\$"		
		
*----------------------------------------------------------------------------*			
/****************************************************************************/

***************   STEP 3: MERGE WAGE DATA AND CONSTRUCT VARS *****************

******************************************************************************
*----------------------------------------------------------------------------*				


*NEW Wages from Firm Level Data, provided by mingzhi
	preserve
		use "$path/Data/1.ChinaData/Wages/Wage_firmdata/temw1998", clear
		
		append using "$path/Data/1.ChinaData/Wages/Wage_firmdata/temw2000"
		append using "$path/Data/1.ChinaData/Wages/Wage_firmdata/temw2013"
		
		sort citycode year
		bys citycode: gen N=_N
		keep if N==3
		drop N
		reshape wide lnw_wa lnw_s lnw_wg nfirm, i(citycode) j(year)
		
		gen lnc_w_wa9813 = lnw_wa2013 - lnw_wa1998
		gen lnc_w_s9813 = lnw_s2013 - lnw_s1998
		gen lnc_w_wg9813 = lnw_wg2013 - lnw_wg1998
		
		gen lnc_w_wa0013 = lnw_wa2013 - lnw_wa2000
		gen lnc_w_s0013 = lnw_s2013 - lnw_s2000
		gen lnc_w_wg0013 = lnw_wg2013 - lnw_wg2000
		
		
		
		keep lnc* citycode
		gen year=2013
		
		tempfile firmwage
		save "`firmwage'"
	restore

	merge 1:1 citycode year using "`firmwage'"
	drop if _merge==2
	drop _merge



	
	
* UHS Housheold level data on other measures of wealth
	preserve
		use "$path/Data/1.ChinaData/Wages/UHS0209_cleaned_raw", clear	
		
		
		collapse (mean) inc inc_lab inc_bus inc_cap inc_trans liqdfy credit, by(chinacity year)
		lab var inc "Total Income"
		lab var inc_lab "Total Income: Wages"
		lab var inc_bus "Total Income: Self-own Busines"
		lab var inc_cap "Total Income: Capital Gain"
		lab var inc_trans "Total Income: Transfer"
		lab var liqdfy "Other Source: Sale of Property"
		lab var credit "Other Source: Income from lending"

		keep if year==2002|year==2008
		sort chinacity year
		bys chinacity: gen N=_N
		keep if N==2
		drop N
		unique chinacity
		
		reshape wide inc inc_lab inc_bus inc_cap inc_trans liqdfy credit, i(chinacity) j(year)
		
		foreach x in inc inc_lab inc_bus inc_cap inc_trans liqdfy credit {
			gen lnc_`x'_0208 = ln(`x'2008) - ln(`x'2002)
			}
			
		keep chinacity *0208	
		gen year=2013
		
		tempfile uhswealth0208
		save "`uhswealth0208'"
		
	restore
	
	merge 1:1 chinacity year using "`uhswealth0208'"	
	drop if _merge==2
	drop _merge
		
	
* Wage data from ??? is this Chinese Statistical Yearbook data? Available yealy from 1997-2013
	preserve
		use "$path/Data/1.ChinaData/Wages/wage_19942013", clear
		collapse (first) cityname, by(cityid)
		drop if cityid==.
		rename cityname cityname_h
		merge 1:m cityid using "$path/Data/1.ChinaData/Wages/wage_19942013"
		drop _merge
		rename cityid citycode
		drop if citycode==.
		drop if wage==.
		sort citycode year
		keep if year>=1997
			
		
		*fix citycodes for merge
		replace citycode = 1410 if cityname_h=="Jinzhong" 
		replace citycode = 3414 if cityname_h=="Xuancheng"
		replace citycode = 2314 if cityname_h=="Suihua"
		replace citycode = 4424 if cityname_h=="Jieyang"
		replace citycode = 4421 if cityname_h=="Chaozhou"
		replace citycode = 4429 if cityname_h=="Yunfu"
		replace citycode = 5115 if cityname_h=="Nanchong"
		
		drop if cityname_h == "Linfen"
		drop if cityname_h == "Yibin"
		drop if cityname_h == "Chaohu"
		
		
		*save file with yearly wages by city
		sort citycode year
		bys citycode: gen N=_N
		gen balwage9713 = (N==17) //balanced panel indicator for wages
		drop N
		tempfile wageyearly
		save "`wageyearly'"
		
		
		keep if year==2000|year==2004|year==2013
		keep wage citycode cityname_h year
		reshape wide wage, i(citycode cityname_h) j(year)
		
		gen lnc_wage_0413 = ln(wage2013) - ln(wage2004)
		gen lnc_wage_0013 = ln(wage2013) - ln(wage2000)
		
		keep citycode lnc_* cityname_h wage*
		tempfile lnc_wages
		gen year=2013 //to merge in with master data
		gen balanced=1 //to merge in with master data
		
	

	
		drop if lnc_wage_0013==. & lnc_wage_0413==.	
		

		save "`lnc_wages'"
	

	restore
		merge 1:1 citycode year using "`lnc_wages'"
		drop if _merge==2
		drop _merge
		

		merge 1:1 citycode year using "`wageyearly'"
		drop if _merge==2
		drop _merge
		


*Wage Data from Gaurav, wages by educ -- 2005 and 2013 
	preserve
			use "$path/Data/1.ChinaData/Wages/wage_2005_2015_by_edu", clear
			keep citycode year working* meanwage*
			reshape wide working* meanwage*, i(citycode) j(year)
			
			gen lnc_wage_high_0513 = ln(meanwage_high2013) - ln(meanwage_high2005)
			gen lnc_wage_low_0513 = ln(meanwage_low2013) - ln(meanwage_low2005)
			gen lnc_return_0513 = lnc_wage_high_0513 - lnc_wage_low_0513
			drop if lnc_wage_high_0513==.| lnc_wage_low_0513==.
			
			keep citycode lnc*
			gen year=2013 //needed for merge w master data
			gen balanced=1 //needed for merge w master data
			compress
			tempfile lnc_wage_educ
			save "`lnc_wage_educ'"
	restore
	
		merge 1:1 citycode year using "`lnc_wage_educ'"
		drop if _merge==2
		drop _merge

		
*Wage Data from Gaurav, wages by educ MINCERIZED -- 2005 and 2013 
	preserve
			use "$path/Data/1.ChinaData/Wages/Mincerized_Wages_08302019", clear
			keep cityid year res*
			rename cityid citycode
			reshape wide reslow_logwage reshigh_logwage res_logwage reslow_wage reshigh_wage res_wage, i(citycode) j(year)
			
			gen lnc_reswage_high_0515 = ln(reshigh_logwage2015) - ln(reshigh_logwage2005)
			gen lnc_reswage_low_0515 = ln(reslow_logwage2015) - ln(reslow_logwage2005)
			gen lnc_resreturn_0515 = lnc_reswage_high_0515 - lnc_reswage_low_0515
			drop if lnc_reswage_high_0515 ==.| lnc_reswage_low_0515==.
			
			keep citycode lnc*
			gen year=2013 //needed for merge w master data
			gen balanced=1 //needed for merge w master data
			compress
			tempfile lnc_reswage_educ
			save "`lnc_reswage_educ'"
	restore
	
		merge 1:1 citycode year using "`lnc_reswage_educ'"
		drop if _merge==2
		drop _merge
		
		
*Wage Data from GAURAV NEW, there was some error with the mincerized wages?
	preserve
			use "$path/Data/09082019Theta", clear
			
			rename cityid citycode
			gen totwage_high = meanwage_high * working_highpop
			gen totwage_low = meanwage_low * working_lowpop
			gen totwage = totwage_high+totwage_low
			gen working_pop = working_highpop + working_lowpop
			gen meanwage_tot = totwage/working_pop
			
			keep citycode year meanwage* working*
			reshape wide meanwage_high meanwage_low meanwage_tot working_lowpop working_highpop working_pop, i(citycode) j(year)
			
			gen lnc_wage_high_0515 = ln(meanwage_high2015) - ln(meanwage_high2005)
			gen lnc_wage_low_0515 = ln(meanwage_low2015) - ln(meanwage_low2005)
			gen lnc_wage_tot_0515 = ln(meanwage_tot2015) - ln(meanwage_tot2005)
			gen lnc_return_0515 = lnc_wage_high_0515 - lnc_wage_low_0515
			drop if lnc_wage_high_0515 ==.| lnc_wage_low_0515==.
			
			gen lnc_working_pop = ln(working_pop2015) - ln(working_pop2005)
			gen lnc_working_highpop = ln(working_highpop2015) - ln(working_highpop2005)
			gen lnc_working_lowpop = ln(working_lowpop2015) - ln(working_lowpop2005)
			
			keep citycode lnc*
			gen year=2013 //needed for merge w master data
			gen balanced=1 //needed for merge w master data
			compress
			tempfile lnc_reswage2_educ
			save "`lnc_reswage2_educ'"
	restore
	
		merge 1:1 citycode year using "`lnc_reswage2_educ'"
		drop if _merge==2
		drop _merge
				*/
		
		
***UHS Wage data, 190+ cities available, from Mingzhi
		
	merge 1:1 chinacity year using "$path/Data/1.ChinaData/Wages/IncDist_UHS0209"		
	drop if _merge==2
	unique chinacity if _merge==3
	drop _merge
		*gen change in wage 2002-07
		sort citycode year
		forvalues p=1(1)19 {
			bys citycode: gen ch_p`p' = p`p'[_n-6] - p`p'[_n-11] if year==2013
		}
		
		
***Updated UHS Wage data, 190+ cities available, from Mingzhi (NOT PERCENTILE, BUT AVERAGE WITHIN BINS OF 5%)
		
	merge 1:1 chinacity year using "$path/Data/1.ChinaData/Wages/UHS0209pct_cleaned"		
	drop if _merge==2
	unique chinacity if _merge==3
	drop _merge
		*gen change in wage 2002-09
		sort citycode year
		forvalues p=1(1)20 {
			bys citycode: gen lnc_inc`p' = ln(inc_`p'[_n-4]) - ln(inc_`p'[_n-11]) if year==2013
			bys citycode: gen lnc_inclab`p' = ln(inclab_`p'[_n-4]) - ln(inclab_`p'[_n-11]) if year==2013
			bys citycode: gen lnc_incbus`p' = ln(incbus_`p'[_n-4]) - ln(incbus_`p'[_n-11]) if year==2013
			bys citycode: gen lnc_inccap`p' = ln(inccap_`p'[_n-4]) - ln(inccap_`p'[_n-11]) if year==2013
			bys citycode: gen lnc_inctrans`p' = ln(inctrans_`p'[_n-4]) - ln(inctrans_`p'[_n-11]) if year==2013
			bys citycode: gen lnc_liqdfy`p' = ln(liqdfy_`p'[_n-4]) - ln(liqdfy_`p'[_n-11]) if year==2013
			bys citycode: gen lnc_credit`p' = ln(credit_`p'[_n-4]) - ln(credit_`p'[_n-11]) if year==2013
		}		
		

		
*----------------------------------------------------------------------------*			
/****************************************************************************/

*************   STEP 6: HOUSING PRICES DATA MERGE AND ANALYSIS ***************

******************************************************************************
*----------------------------------------------------------------------------*	
		
		preserve
			
			clear
			
		**Clean up COMMERCIAL prices
			use "$path/Data/1.ChinaData/Housing/WindInfomation_Real_Estate_Data_CityLevel_Annual", clear
			
			//merge in citycode crosswalk
			merge m:1 City Province using "$path/Data/5.Crosswalks/wind_cd4_crosswalk.dta"
			
			rename cd4 citycode
			rename Year year
			rename FloorSpaceofCommercializedBu floorspace_comm 
			rename E floorspace_res 
			rename ValueofCommercializedBuilding value_building_comm 
			rename ValueofSalesofCommercialized value_building_res
			keep year citycode floor* value_*
			destring citycode, replace
			keep if year>=2002 & year<=2013
			drop if floorspace_comm==. | value_building_comm==.
			
			collapse (sum) value_building_comm floorspace_comm, by(citycode year)
			sort citycode year
			bys citycode: gen N=_N
			gen bal_comm0213=(N==12)
			drop N
			
			tempfile comm
			save "`comm'"
			
		**Clean up RESIDENTIAL prices	
			use "$path/Data/1.ChinaData/Housing/WindInfomation_Real_Estate_Data_CityLevel_Annual", clear
			
			//merge in citycode crosswalk
			merge m:1 City Province using "$path/Data/5.Crosswalks/wind_cd4_crosswalk.dta"
			
			rename cd4 citycode
			rename Year year
			rename FloorSpaceofCommercializedBu floorspace_comm 
			rename E floorspace_res 
			rename ValueofCommercializedBuilding value_building_comm 
			rename ValueofSalesofCommercialized value_building_res
			keep year citycode floor* value_*
			destring citycode, replace
			keep if year>=2005 & year<=2013
			drop if floorspace_res==. | value_building_res==.
			
			collapse (sum) value_building_res floorspace_res, by(citycode year)
			sort citycode year
			bys citycode: gen N=_N
			gen bal_res0513=(N==9)
			drop N
			
			tempfile res
			save "`res'"
			
		**MERGE RES AND COMM TOGETHER
			merge 1:1 citycode year using "`comm'"
			sort citycode year
			drop _merge
			
			
			reshape wide value_building_res floorspace_res value_building_comm floorspace_comm bal_comm0213 bal_res0513, i(citycode) j(year)
			reshape long value_building_res floorspace_res value_building_comm floorspace_comm bal_comm0213 bal_res0513, i(citycode) j(year)
			
		**LONG DIFFERENEC	
			gen ppsq_res = value_building_res/floorspace_res
			gen ppsq_comm = value_building_comm /floorspace_comm 
			
			sort citycode year
			bys citycode: gen lnc_ppsq_res_0513 = ln(ppsq_res) - ln(ppsq_res[_n-8]) if year==2013
			bys citycode: gen lnc_ppsq_comm_0213 = ln(ppsq_comm) - ln(ppsq_comm[_n-11]) if year==2013
			bys citycode: gen lnc_ppsq_comm_0513 = ln(ppsq_comm) - ln(ppsq_comm[_n-8]) if year==2013
			
			
			tempfile housing
			save "`housing'"
		
		restore
		
			merge 1:1 year citycode using "`housing'", gen(m_housing)
			drop if m_housing==2
		
	

	
	
	*RREGION CODES
	gen cc=string(citycode)
	gen region= substr(cc,1,1)
	destring region, replace
	drop cc
	gen cc=string(citycode)
	gen prov= substr(cc,1,2)
	destring prov, replace
	drop cc
	
	
	
	
gen sample = 1 if balanced==1 & popwt03!=. 
	egen minsample = min(sample), by(citycode)
	drop sample
	rename minsample sample

	gen exports = dvalue/10000
	
	gen gdppc_true = gdppc*10
	
	gen stud_ot = stud_1 + stud_2 + stud_7 + stud_8
	gen stud_nonfunded = stud_tot - stud_univ_funded
	gen gdp_true = gdp*10000
	gen ln_gdp_true=ln(gdp_true)
	
	foreach x in as ba ma dr ot stem ss arts bus hi_admrate_p0_25 hi_admrate_p25_50 hi_admrate_p50_75 hi_admrate_p75_100 univ_funded nonfunded {
	gen sh_`x' = stud_`x'/stud_tot
	}
	
			
	****5 CONTROL FOR INTERNAL MIGRATION
		
			*format MZ migration data and merge
			preserve
			
				use "$path/Data/1.ChinaData/Migration/Citymigrations0015", clear
				
				reshape wide ratio_skout ratio_unout sharemig_sk sharemig_un, i(citycode) j(year)
				
				foreach x in ratio_skout ratio_unout sharemig_sk sharemig_un {
				
					gen ch_`x'_0015 = `x'2015 - `x'2000
					gen ch_`x'_0515 = `x'2015 - `x'2005
				}
				
				//only retain those cities with all migration rates available
				gen sumcheck = ch_ratio_skout_0015 + ch_ratio_skout_0515 + ch_ratio_unout_0015 + ch_ratio_unout_0515 + ch_sharemig_sk_0015 + ch_sharemig_sk_0515 + ch_sharemig_un_0015 + ch_sharemig_un_0515
				drop if sumcheck==. 
				
				keep citycode ch*
				
				tempfile migration
				save "`migration'"
				
			restore
					
				
				merge m:1 citycode using "`migration'", gen(m_mzmigrate)
				drop if m_mzmigrate==2
				

				**CONSTRUCT CHANGE FROM 2004-2013, STANDARDIZED BY 2013 POP
				tsset citycode year
				foreach lvl in tot {
				bys citycode: gen shc_stud_`lvl'_0413_pop13 = (stud_`lvl' - stud_`lvl'[_n-9]) / (pop) if year==2013
				}
				
				
				
					
*---------------------------------------------------------------------------*			
/****************************************************************************/

*************   REGRESSION ANALYSIS   ***************

******************************************************************************
*----------------------------------------------------------------------------*
//SET CONTROLS:
global controls "contract_cons_exw97 chinatariff00_expw97 inputtariff02_expw97 share_revenue_exw97" 
label var contract_cons_exw97 "Contract Intensity"
label var share_revenue_exw97 "Export License"
label var chinatariff00_expw97 "Import Tariffs"
label var inputtariff02_expw97 "Input Tariffs"
label var ln_chinatariff_expw97 "Annual Import Tariffs"
label var ln_inputtariff_expw97 "Annual Input Tariffs"

**DROP CITY MISSING OCDE
drop if citycode==.	
	
			
		**NEW UHS ANALYSIS RENTS, INCOME PERCENTILES, N HOUSES ETC
		*merge m:1 chinacity using "$path/Data/1.ChinaData/UHS/UHS_d_0207"
		merge m:1 chinacity using "$path/Data/1.ChinaData/UHS/UHS_d_0207_combined"
		drop if _merge==2
		drop _merge
	
		
		
		**NEW SCATTERPLOTS OF EXPENDITURE SHARE ON SERVICES AND BORROWING
		
		egen iv_bin = cut(iv_ntr), group(40)
		egen mean_iv_ntr=mean(iv_ntr), by(iv_bin)
		egen total_pop=total(pop), by(iv_bin)
		
		
		label var d_totservice_share "Growth in Service Expenditure Shares"
		label var d_borrowing_exp "Growth in the Share of Borrowing to Income"

		foreach var in  d_totservice_share d_borrowing_exp {
		egen mean_`var'=mean(`var') if year==2013 & balanced==1, by(iv_bin)
		reg `var' iv_ntr $controls if year==2013 & balanced==1, cluster(citycode)
		loc b =  _b[iv_ntr] // get the beta
		loc t =  _b[iv_ntr]/_se[iv_ntr]  // get the t-statistic
		loc bf: di %12.3f `b'
		loc tf: di %12.3f `t'
		loc p : di %12.3f 2*ttail(e(df_r),abs(`t'))
		twoway (scatter mean_`var' mean_iv_ntr  [weight=total_pop], msymbol(circle_hollow) ) ///
		(lfit `var' iv_ntr ) if year==2013 , legend(on order(- "Coeff: `bf'" "p-value: `p'") pos(11) ring(0))  ///
		xtitle("PNTR{sub:c} (Weighted NTR Gaps)",  size(medlarge)) scheme(s1mono)  ///
		name(`var', replace) title("`: variable label `var''", size(medlarge))
	
		graph export "$path/Figures/fig_`var'_18oct2021.png", replace width(1200)
		drop mean_`var'
		}
		
